# Clean and Prep Govt Spending Revision et al Lucid Data
# Created: 01.03.2019

library(tidyr)
library(dplyr)
library(Hmisc)
library(estimatr)

rm(list = ls())

setwd("PATH TO DATA FOLDER")

load("Govt_Spend_Rev_Appended_IND_WTS.RData")

# Note: full2 = complete data, including all respondents who are successfully
# randomized but may attri; x = respondents who complete survey with weights

# Check Treatment Balance

x <- x[x$isRandomized == 1 & x$complete == 1, ]

table(x$first)
table(x$first_new)
table(x$resframe)
table(x$policy_treat)

table(x$debrief, useNA = "always")
table(x$debrief, x$sample, useNA = "always")

table(x$itreat[!is.na(x$debrief)])
table(x$streat[!is.na(x$debrief)])

# Check treatment presentation

# for (j in 16:23) {
#   print(table(x[ ,j], useNA = "always"))
# }

table(x$resframe, x$n_d, useNA = "always")
table(x$resframe, x$n_d_rem, useNA = "always")

table(x$resframe, x$n_i, useNA = "always")
table(x$resframe, x$n_i_rem, useNA = "always")

table(x$resframe, x$j_i, useNA = "always")
table(x$resframe, x$j_i_rem, useNA = "always")

table(x$resframe, x$j_d, useNA = "always")
table(x$resframe, x$j_d_rem, useNA = "always")

table(x$policy_treat[x$policy == "hm"], x$hm_pro0[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_pro1[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_pro2[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_pro3[x$policy == "hm"], useNA = "always")

table(x$policy_treat[x$policy == "hm"], x$hm_reg0[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_reg1[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_reg2[x$policy == "hm"], useNA = "always")
table(x$policy_treat[x$policy == "hm"], x$hm_reg3[x$policy == "hm"], useNA = "always")

table(x$policy_treat[x$policy == "ui"], x$ui_pro0[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_pro1[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_pro2[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_pro3[x$policy == "ui"], useNA = "always")

table(x$policy_treat[x$policy == "ui"], x$ui_reg0[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_reg1[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_reg2[x$policy == "ui"], useNA = "always")
table(x$policy_treat[x$policy == "ui"], x$ui_reg3[x$policy == "ui"], useNA = "always")

# Check demographic pass-throughs

names(x)
head(x[ ,42:48])

table(x$education_short, useNA = "always")
table(x$education_long, useNA = "always")
table(x$ethnicity, useNA = "always")
table(x$gender, useNA = "always")
table(x$hhi_lucid, useNA = "always")
table(x$hispanic, useNA = "always")
table(x$political_party_short, useNA = "always")
table(x$political_party_long, useNA = "always")
table(x$region, useNA = "always")
sum(is.na(x$zip))
sum(x$rid == "")

#########################
# CLEAN DATA for ANALYSIS
#########################

# Re-code Treatment Indicators for N/J Programs

# For Nutrition Program

table(x$resframe, useNA = "always")
x$n_treat_v <- NA
x$n_treat_v[x$resframe == "d"] <- "direct"
x$n_treat_v[x$resframe == "i"] <- "indirect"
table(x$resframe, x$n_treat_v, useNA = "always")

# For Job Training Program (opposite to nutrition framing by design)

x$j_treat_v <- NA
x$j_treat_v[x$resframe == "d"] <- "indirect"
x$j_treat_v[x$resframe == "i"] <- "direct"
table(x$resframe, x$j_treat_v, useNA = "always")

# Create combined variable for Nutrition/Job Training primary outcome
# Nutrition

table(x$n_d, useNA = "always")
table(x$n_i, useNA = "always")

x$n_r <- NA

x$n_r[!is.na(x$n_d) & !is.na(x$n_treat_v) & x$n_treat_v == "direct"] <-
  x$n_d[!is.na(x$n_d) & !is.na(x$n_treat_v) & x$n_treat_v == "direct"]

x$n_r[!is.na(x$n_i) & !is.na(x$n_treat_v) & x$n_treat_v == "indirect"] <-
  x$n_i[!is.na(x$n_i) & !is.na(x$n_treat_v) & x$n_treat_v == "indirect"]

table(x$n_r, useNA = "always")

table(x$n_d, useNA = "always")
table(x$n_r, x$n_d, useNA = "always")

table(x$n_i, useNA = "always")
table(x$n_r, x$n_i, useNA = "always")

#Job Training

table(x$j_d, useNA = "always")
table(x$j_i, useNA = "always")

x$j_r <- NA

x$j_r[!is.na(x$j_d) & !is.na(x$j_treat_v) & x$j_treat_v == "direct"] <-
  x$j_d[!is.na(x$j_d) & !is.na(x$j_treat_v) & x$j_treat_v == "direct"]

x$j_r[!is.na(x$j_i) & !is.na(x$j_treat_v) & x$j_treat_v == "indirect"] <-
  x$j_i[!is.na(x$j_i) & !is.na(x$j_treat_v) & x$j_treat_v == "indirect"]

table(x$j_r, useNA = "always")

table(x$j_d, useNA = "always")
table(x$j_r, x$j_d, useNA = "always")

table(x$j_i, useNA = "always")
table(x$j_r, x$j_i, useNA = "always")

# Recode _mech responses for NUTRITION

table(x$n_mech_1, useNA = "always")

x$n_need1 <- x$n_mech_1

table(x$n_mech_1, x$n_need1)

##

table(x$n_mech_2, useNA = "always")

x$n_need2 <- x$n_mech_2

table(x$n_mech_2, x$n_need2)

##

table(x$n_mech_3, useNA = "always")

x$n_effort <- x$n_mech_3

table(x$n_mech_3, x$n_effort)

## 

table(x$n_mech_4, useNA = "always")

x$n_depend <- x$n_mech_4

table(x$n_mech_4, x$n_depend)

# Recode _mech responses for JOB TRAINING

table(x$j_mech_1, useNA = "always")

x$j_need1 <- x$j_mech_1

table(x$j_mech_1, x$j_need1)

##

table(x$j_mech_2, useNA = "always")

x$j_need2 <- x$j_mech_2

table(x$j_mech_2, x$j_need2)

##

table(x$j_mech_3, useNA = "always")

x$j_effort <- x$j_mech_3

table(x$j_mech_3, x$j_effort)

## 

table(x$j_mech_4, useNA = "always")

x$j_depend <- x$j_mech_4

table(x$j_mech_4, x$j_depend)

# Recode Alternative Program Questions for NUTRITION

table(x$n_d_alt, useNA = "always")
table(x$n_d_alt[x$resframe == "d" & x$first_new == "j"], useNA = "always")

table(x$n_i_alt, useNA = "always")
table(x$n_i_alt[x$resframe == "i" & x$first_new == "j"], useNA = "always")

x$n_alt <- NA

x$n_alt[x$resframe == "d" & x$first_new == "j"] <- x$n_d_alt[x$resframe == "d" & x$first_new == "j"]
x$n_alt[x$resframe == "i" & x$first_new == "j"] <- x$n_i_alt[x$resframe == "i" & x$first_new == "j"]

table(x$n_d_alt, x$n_alt, useNA = "always")
table(x$n_i_alt, x$n_alt, useNA = "always")

# Recode Alternative Program Questions for JOB TRAINING

table(x$j_d_alt, useNA = "always")
table(x$j_d_alt[x$resframe == "i" & x$first_new == "n"], useNA = "always")

table(x$j_i_alt, useNA = "always")
table(x$j_i_alt[x$resframe == "d" & x$first_new == "n"], useNA = "always")

x$j_alt <- NA

x$j_alt[x$resframe == "i" & x$first_new == "n"] <- x$j_d_alt[x$resframe == "i" & x$first_new == "n"]
x$j_alt[x$resframe == "d" & x$first_new == "n"] <- x$j_i_alt[x$resframe == "d" & x$first_new == "n"]

table(x$j_d_alt, x$j_alt, useNA = "always")
table(x$j_i_alt, x$j_alt, useNA = "always")

# Create total diverted variable for N and J

x$n_divert <- NA
x$n_divert <- x$n_w_2 + x$n_w_3 + x$n_w_4
sum(is.na(x$n_divert))

x$j_divert <- NA
x$j_divert <- x$j_w_2 + x$j_w_3 + x$j_w_4
sum(is.na(x$j_divert))

# recode MAIN OUTCOME variables for HMID and UI

table(x$hm_r1, useNA = "always")

x$hm_main <- NA
x$hm_main[!is.na(x$hm_r1) & x$hm_r1 == -1] <- -2
x$hm_main[!is.na(x$hm_r1) & x$hm_r1 == -0.5] <- -1
x$hm_main[!is.na(x$hm_r1) & x$hm_r1 == 0] <- 0
x$hm_main[!is.na(x$hm_r1) & x$hm_r1 == 0.5] <- 1
x$hm_main[!is.na(x$hm_r1) & x$hm_r1 == 1] <- 2
table(x$hm_main, x$hm_r1, useNA = "always")

table(x$ui_r1, useNA = "always")

x$ui_main <- NA
x$ui_main[!is.na(x$ui_r1) & x$ui_r1 == -1] <- -2
x$ui_main[!is.na(x$ui_r1) & x$ui_r1 == -0.5] <- -1
x$ui_main[!is.na(x$ui_r1) & x$ui_r1 == 0] <- 0
x$ui_main[!is.na(x$ui_r1) & x$ui_r1 == 0.5] <- 1
x$ui_main[!is.na(x$ui_r1) & x$ui_r1 == 1] <- 2

table(x$ui_main, x$ui_r1, useNA = "always")

# Create new spillover outcome variables for HMID and UI

table(x$hm_r2, useNA = "always")

x$hm_spill <- NA
x$hm_spill[!is.na(x$hm_r2) & x$hm_r2 == 1] <- -2
x$hm_spill[!is.na(x$hm_r2) & x$hm_r2 == 2] <- -1
x$hm_spill[!is.na(x$hm_r2) & x$hm_r2 == 3] <- 0
x$hm_spill[!is.na(x$hm_r2) & x$hm_r2 == 4] <- 1
x$hm_spill[!is.na(x$hm_r2) & x$hm_r2 == 5] <- 2

table(x$hm_spill, x$hm_r2, useNA = "always")

table(x$ui_r2, useNA = "always")

x$ui_spill <- NA
x$ui_spill[!is.na(x$ui_r2) & x$ui_r2 == 1] <- -2
x$ui_spill[!is.na(x$ui_r2) & x$ui_r2 == 2] <- -1
x$ui_spill[!is.na(x$ui_r2) & x$ui_r2 == 3] <- 0
x$ui_spill[!is.na(x$ui_r2) & x$ui_r2 == 4] <- 1
x$ui_spill[!is.na(x$ui_r2) & x$ui_r2 == 5] <- 2

table(x$ui_spill, x$ui_r2, useNA = "always")

# Recode "_s1" variable into a "Favor Status Quo" indicator named "_quo"

x$hm_quo <- NA
table(x$hm_s1, useNA = "always")
x$hm_quo[!is.na(x$hm_s1) & x$hm_s1 == 3] <- 1
x$hm_quo[!is.na(x$hm_s1) & x$hm_s1 %in% c(1,2)] <- 0
table(x$hm_quo, x$hm_s1, useNA = "always")

x$ui_quo <- NA
table(x$ui_s1, useNA = "always")
x$ui_quo[!is.na(x$ui_s1) & x$ui_s1 == 3] <- 1
x$ui_quo[!is.na(x$ui_s1) & x$ui_s1 %in% c(1,2)] <- 0
table(x$ui_quo, x$ui_s1, useNA = "always")

# Code College

table(x$edu, useNA = "always")
x$college <- NA

x$college[x$edu <= 3] <- 0
x$college[x$edu >= 4] <- 1

# Recode HHI

table(x$ps3, useNA = "always")

x$hhimid <- NA
x$hhimid[!is.na(x$ps3) & x$ps3 == 1] <- 5
x$hhimid[!is.na(x$ps3) & x$ps3 == 2] <- 12.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 3] <- 17.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 4] <- 22.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 5] <- 27.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 6] <- 32.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 7] <- 37.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 8] <- 42.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 9] <- 47.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 10] <- 52.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 11] <- 57.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 12] <- 62.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 13] <- 67.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 14] <- 72.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 15] <- 77.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 16] <- 82.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 17] <- 87.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 18] <- 92.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 19] <- 97.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 20] <- 112.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 21] <- 137.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 22] <- 162.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 23] <- 187.5
x$hhimid[!is.na(x$ps3) & x$ps3 == 24] <- 225
x$hhimid[!is.na(x$ps3) & x$ps3 == 25] <- 250

# Code Dem and Rep from PID and lean questions

table(x$pid, useNA = "always")
x$dem <- 0
x$dem[!is.na(x$pid) & (x$pid == 1 | (x$pid %in% c(3,4,5) & !is.na(x$lean) & x$lean == 2))] <- 1
table(x$dem, x$pid, useNA = "always")
table(x$pid, x$lean, useNA = "always")

table(x$pid, useNA = "always")
x$rep <- 0
x$rep[!is.na(x$pid) & (x$pid == 2 | (x$pid %in% c(3,4,5) & !is.na(x$lean) & x$lean == 1))] <- 1
table(x$rep, x$pid, useNA = "always")
table(x$pid, x$lean, useNA = "always")

# Code Ideology

table(x$ps7, useNA = "always")
x$libs <- 0
x$libs[!is.na(x$ps7) & x$ps7 %in% c(1,2,3)] <- 1
table(x$libs, x$ps7, useNA = "always")

x$cons <- 0
x$cons[!is.na(x$ps7) & x$ps7 %in% c(5,6,7)] <- 1
table(x$cons, x$ps7, useNA = "always")
